%% Monte Carlo Simulations - Canen and Song (2018)
%Based on draft version B18, Sep 28 - 2018

clear
clc
rng(12123712) %set seed.

% cpsdat = csvread('empirical_CPS_20Dec2020_short.csv');
load empirical_CPS_20Dec2020_short.mat

%Linear Model
Z2 = 10^8;						% assumed upper bound on salary1 + salary2
top_coding = 0.05;              % amount of top coding.
n = size(incwage,1);			% cardinality of support of true distribution

%Simulation Set up
B = 999; %bootstrap repetitions (to find k_alpha)
alpha1 = 0.005; %Bonferroni
alpha = 0.05; %size of test
kappa_n = 0.03;

%Data
%wages = cpsdat(:,3) + cpsdat(:,4);
swages = sort(incwage);
Z1 = swages(ceil((1-top_coding)*n),1);% figure out the censoring level

data = zeros(n,2);

% make support of the true distribution
for i = 1:n
	if (incwage(i,1) > Z1)
	   data(i,1) = Z1;
	   data(i,2) = Z2;
	else
	   data(i,1) = incwage(i,1);
	   data(i,2) = incwage(i,1);
	end
end

Z1t_aux = log(data(:,1)./(52*uhrsworkly)); %Y1
Z2t_aux = log(data(:,2)./(52*uhrsworkly)); %Y2
X = [ones(n,1), sex]; %covariates (Male = 1)
X1 = X(:,2:end); %covariates absent constant
D = college_grad; %college (treatment)

%Grid
grid_jump = 0.05; 
range_delta1 = 2;
range_delta2 = 1;
range_gamma = 1;

%parameter space (Linear Model)
d = size(X,2)+1; %constant, variables.
theta = ([X,D]'*[X,D])\([X,D])'*Z1t_aux; %starting guess for beta: OLS estimate with data.
delta_1 = theta(1);
delta_2 = theta(2);
gamma = theta(3);

%Grid for theta (has to be positive, make it large enough to search) 
delta1_grid = 0.1:grid_jump:delta_1+range_delta1; %values must have lower grid above 0 due to log() parametrization.
delta2_grid = 0.01:grid_jump:delta_2+range_delta2; %values must have lower grid above 0 due to log() parametrization. (by extensive empirical evidence, we also know male wages >= than female)
gamma_grid = 0:grid_jump:range_gamma; %upper grid is 100% return to college (above upper bound of Oreopoulos (2014) survey).
p_eta_grid = 0.01:grid_jump:0.99;

%Matrix of bootstrap draws
X_order_B = randi(n,[n,B]);

%Indicator functions for X, D.
X1_eq_1_D_eq_1 = (sex==1 & D==1);
X1_eq_1_D_eq_0 = (sex==1 & D==0);
X1_eq_0_D_eq_1 = (sex==0 & D==1);
X1_eq_0_D_eq_0 = (sex==0 & D==0);

%% Inference

%Store beta CS
beta_ID_LF = [];
beta_ID_RSW = [];

beta_LRR_LF = [];
beta_LRR_RSW = [];

%Loop across grid points
parfor l = 1:size(gamma_grid,2)
l
    gamma = gamma_grid(l);
    G_p_grid_kappa = cell(size(delta1_grid,2),size(delta2_grid,2),size(gamma_grid,2),size(p_eta_grid,2)); %G_p_grid_kappa is computed for every beta_grid point...
    G_p_grid_minuskappa = cell(size(delta1_grid,2),size(delta2_grid,2),size(gamma_grid,2),size(p_eta_grid,2));
    CI_temp_LF = cell(size(delta1_grid,2),size(delta2_grid,2),size(p_eta_grid,2));
    CI_temp_RSW = cell(size(delta1_grid,2),size(delta2_grid,2),size(p_eta_grid,2));

for j = 1:size(delta1_grid,2)
    
    delta_1 = delta1_grid(j);
    
for k = 1:size(delta2_grid,2)
    
    delta_2 = delta2_grid(k);
    
for m = 1:size(p_eta_grid,2)
    
    p_eta = p_eta_grid(m);
    
%Moment conditions for those parameters
m1 = (Z1t_aux.*X1_eq_1_D_eq_1 - (p_eta*(delta_1+delta_2+gamma)+ (1-p_eta)*(log(delta_1+delta_2+gamma))).*X1_eq_1_D_eq_1);
m2 = (Z1t_aux.*X1_eq_1_D_eq_0 - (p_eta*(delta_1+delta_2) + (1-p_eta)*log(delta_1+delta_2)).*X1_eq_1_D_eq_0);
m3 = (Z1t_aux.*X1_eq_0_D_eq_1 - (p_eta*(delta_1+gamma) + (1-p_eta)*log(delta_1+gamma)).*X1_eq_0_D_eq_1);
m4 = (Z1t_aux.*X1_eq_0_D_eq_0 - (p_eta*(delta_1) + (1-p_eta)*log(delta_1)).*X1_eq_0_D_eq_0);

m5 = (p_eta*(delta_1+delta_2+gamma) + (1-p_eta)*log(delta_1+delta_2+gamma)).*X1_eq_1_D_eq_1-Z2t_aux.*X1_eq_1_D_eq_1;
m6 = (p_eta*(delta_1+delta_2) + (1-p_eta)*log(delta_1+delta_2)).*X1_eq_1_D_eq_0-Z2t_aux.*X1_eq_1_D_eq_0;
m7 = (p_eta*(delta_1+gamma) + (1-p_eta)*log(delta_1+gamma)).*X1_eq_0_D_eq_1-Z2t_aux.*X1_eq_0_D_eq_1;
m8 = (p_eta*(delta_1) + (1-p_eta)*log(delta_1)).*X1_eq_0_D_eq_0-Z2t_aux.*X1_eq_0_D_eq_0;

%Average Moment.
mbar = [mean(m1),mean(m2),mean(m3),mean(m4),mean(m5),mean(m6),mean(m7),mean(m8)]; %mean over n.

%Variance of Moments
sigma_hat2_temp = [sum((m1-mean(m1)).^2)/size(m1,1),sum((m2-mean(m2)).^2)/size(m2,1),sum((m3-mean(m3)).^2)/size(m3,1),sum((m4-mean(m4)).^2)/size(m4,1),sum((m5-mean(m5)).^2)/size(m5,1),sum((m6-mean(m6)).^2)/size(m6,1),sum((m7-mean(m7)).^2)/size(m7,1),sum((m8-mean(m8)).^2)/size(m8,1)];

%Test Statistic
Qhat_kappa = sum(max((mbar./sqrt(sigma_hat2_temp))+kappa_n,0));
Qhat_0 = sum(max((mbar./sqrt(sigma_hat2_temp)),0));
Qhat_minuskappa = sum(max((mbar./sqrt(sigma_hat2_temp))-kappa_n,0));

%Test Statistic
T_beta_temp = sqrt(n)*Qhat_0;

%Computation of Critical Values Least Favorable Case
c_1minusalpha_LF = find_crit_LF_20Dec2020(n,mbar,Z1t_aux,Z2t_aux,X1',D,X_order_B,B,sigma_hat2_temp,delta_1,delta_2,gamma,p_eta,alpha);

%Crit value for RSW
k1 = find_kalpha_20Dec2020(n,mbar,Z1t_aux,Z2t_aux,X1',D,X_order_B,B,sigma_hat2_temp,delta_1,delta_2,gamma,p_eta,alpha1);
 
%Constructing lambda for beta_j, gamma_k
lambda = zeros(1,size(mbar,2));

for q = 1:size(mbar,2)
aux = sqrt(diag(sigma_hat2_temp));        
lambda(q) = min(mbar(q)-k1*aux(q)/sqrt(n),0); 
end
    
%Critical Value in Least Favourable case.
c_1minusalpha_RSW = find_crit_RSW_20Dec2020(n,mbar,Z1t_aux,Z2t_aux,X1',D,X_order_B,B,sigma_hat2_temp,delta_1,delta_2,gamma,p_eta,lambda,alpha,alpha1);

%Confidence Interval
Conf_temp_LF = (sqrt(n)*Qhat_0<=c_1minusalpha_LF);
Conf_temp_RSW = (sqrt(n)*Qhat_0<=c_1minusalpha_RSW);

if Conf_temp_LF==1
   CI_temp_LF{j,k,m} = [delta_1, delta_2, gamma,p_eta];
   delta_temp = [delta_1; delta_2];
   beta_temp = p_eta*gamma+(1-p_eta)*mean(log((X*delta_temp)+gamma)-log(X*delta_temp));
   beta_ID_LF = [beta_ID_LF; beta_temp];
end

if Conf_temp_RSW==1
   CI_temp_RSW{j,k,m} = [delta_1, delta_2, gamma,p_eta];
   delta_temp = [delta_1; delta_2];
   beta_temp = p_eta*gamma+(1-p_eta)*mean(log((X*delta_temp)+gamma)-log(X*delta_temp));
   beta_ID_RSW = [beta_ID_RSW; beta_temp];
end

%Store values of parameters satisfying Qhat_kappa = 0, Qhat_minuskappa = 0 (needed to find LRR)
if Qhat_kappa == 0
G_p_grid_kappa{j,k,m} = [delta_1, delta_2, gamma, p_eta];
end

if Qhat_minuskappa == 0
G_p_grid_minuskappa{j,k,m} = [delta_1, delta_2, gamma, p_eta];
end
end
end
end

%Convert into matrix
CI_temp_LF = CI_temp_LF(~cellfun('isempty',CI_temp_LF)); %drop empty cells
CI_temp_LF = cell2mat(CI_temp_LF); %transform into matrix

CI_temp_RSW = CI_temp_RSW(~cellfun('isempty',CI_temp_RSW)); %drop empty cells
CI_temp_RSW = cell2mat(CI_temp_RSW); %transform into matrix

%Convert into matrix
G_p_grid_minuskappa = G_p_grid_minuskappa(~cellfun('isempty',G_p_grid_minuskappa)); %drop empty cells
G_p_grid_minuskappa = cell2mat(G_p_grid_minuskappa); %transform into matrix

G_p_grid_kappa = G_p_grid_kappa(~cellfun('isempty',G_p_grid_kappa)); %drop empty cells
G_p_grid_kappa = cell2mat(G_p_grid_kappa); %transform into matrix


%Finding LRR (for each grid point of gamma_grid).
if isempty(G_p_grid_kappa)==0
    
%Store vectors for LRR
Q_LRR = zeros(1,size(G_p_grid_kappa,1));
    
for k = 1:size(G_p_grid_kappa,1) %for each gamma consistent with it
%Calculate objective function at parameters values in G_p_grid_kappa

%X1 = 1, D = 1
term1 = G_p_grid_kappa(k,1)+G_p_grid_kappa(k,2)+G_p_grid_kappa(k,3)-log(G_p_grid_kappa(k,1)+G_p_grid_kappa(k,2)+G_p_grid_kappa(k,3));

%X1 = 1, D = 0
term2 = G_p_grid_kappa(k,1)+G_p_grid_kappa(k,2)-log(G_p_grid_kappa(k,1)+G_p_grid_kappa(k,2));

%X1 = 0, D = 1
term3 = G_p_grid_kappa(k,1)+G_p_grid_kappa(k,3)-log(G_p_grid_kappa(k,1)+G_p_grid_kappa(k,3));

%X1 = 0, D = 0
term4 = G_p_grid_kappa(k,1)-log(G_p_grid_kappa(k,1));

%LRR objective
Q_LRR(k) = mean(D)*mean(X1)*(term1.^2) + mean(X1)*(1-mean(D))*(term2.^2)+mean(D)*(1-mean(X1))*(term3.^2)+(1-mean(D))*(1-mean(X1))*(term4.^2);
end

%Admissible parameters for LRR computation (i.e. find where the values in Gamma_hat_minuskappa are within Gamma_hat_kappa)
[~,aux_find]= intersect(G_p_grid_kappa,G_p_grid_minuskappa,'rows');
Q_LRR_kU_minuskappa = Q_LRR(aux_find);

%For given set of parameters, which minimizes obj function is the LRR
LRR_minimizers = (Q_LRR_kU_minuskappa<=min(Q_LRR)+2*kappa_n);  %values of gamma.
Gamma_LRR_kU = G_p_grid_minuskappa(LRR_minimizers,:);

%Finding LRR CI for Least Favorable: %which elements are both in Idset and in Gamma_LRR_kU?
LRR_set = intersect(CI_temp_LF,Gamma_LRR_kU,'rows'); 

%Computing parameter of interest (ATE) in LRR CI: for each set of parameters in the grid, find beta and store
for k = 1:size(LRR_set,1) 
beta_temp = LRR_set(k,4)*LRR_set(k,3)+(1-LRR_set(k,4))*mean(log(X*[LRR_set(k,1);LRR_set(k,2)]+LRR_set(k,3))-log(X*[LRR_set(k,1);LRR_set(k,2)]));
beta_LRR_LF = [beta_LRR_LF;beta_temp];
end

%Finding LRR CI for RSW: %which elements are both in Idset and in Gamma_LRR_kU?
LRR_set = intersect(CI_temp_RSW,Gamma_LRR_kU,'rows'); 

%Computing parameter of interest (ATE) in LRR CI: for each set of parameters in the grid, find beta and store
for k = 1:size(LRR_set,1) 
beta_temp = LRR_set(k,4)*LRR_set(k,3)+(1-LRR_set(k,4))*mean(log(X*[LRR_set(k,1);LRR_set(k,2)]+LRR_set(k,3))-log(X*[LRR_set(k,1);LRR_set(k,2)]));
beta_LRR_RSW = [beta_LRR_RSW;beta_temp];
end

%If this gridpoint is not useful for LRR (empty Gamma_LRR_kU)
else
    
if size(G_p_grid_minuskappa,1)>0
LRR_set = intersect(CI_temp_LF,G_p_grid_minuskappa,'rows'); %Look at G_p_grid_minuskappa instead of Gamma_LRR_kU (which is empty)

for k = 1:size(LRR_set,1) 
beta_temp = LRR_set(k,4)*LRR_set(k,3)+(1-LRR_set(k,4))*mean(log(X*[LRR_set(k,1);LRR_set(k,2)]+LRR_set(k,3))-log(X*[LRR_set(k,1);LRR_set(k,2)]));
beta_LRR_LF = [beta_LRR_LF;beta_temp];
end

if isempty(CI_temp_RSW)==0
LRR_set = intersect(CI_temp_RSW,G_p_grid_minuskappa,'rows'); %Look at G_p_grid_minuskappa instead of Gamma_LRR_kU (which is empty)

for k = 1:size(LRR_set,1) 
beta_temp = LRR_set(k,4)*LRR_set(k,3)+(1-LRR_set(k,4))*mean(log(X*[LRR_set(k,1);LRR_set(k,2)]+LRR_set(k,3))-log(X*[LRR_set(k,1);LRR_set(k,2)]));
beta_LRR_RSW = [beta_LRR_RSW;beta_temp];
end

end
end
end

end

%% Results

beta_ID_LF_final = sort(unique(beta_ID_LF));
beta_ID_LF_final(beta_ID_LF_final==999)=[]; %drop non-updated values.

beta_ID_RSW_final = sort(unique(beta_ID_RSW));
beta_ID_RSW_final(beta_ID_RSW_final==999)=[]; %drop non-updated values.

beta_LRR_LF_final = sort(unique(beta_LRR_LF));
beta_LRR_LF_final(beta_LRR_LF_final==999)=[]; %drop non-updated values.

beta_LRR_RSW_final = sort(unique(beta_LRR_RSW));
beta_LRR_RSW_final(beta_LRR_RSW_final==999)=[]; %drop non-updated values.

disp('Final beta ID: LF, RSW')
[min(beta_ID_LF_final),max(beta_ID_LF_final)]/4
[min(beta_ID_RSW_final),max(beta_ID_RSW_final)]/4


disp('Final beta LRR')
[min(beta_LRR_LF_final),max(beta_LRR_LF_final)]/4
[min(beta_LRR_RSW_final),max(beta_LRR_RSW_final)]/4

%% Save

clearvars -except beta_LRR_LF beta_LRR_RSW beta_ID_LF beta_ID_RSW kappa_n alpha alpha1 Z1 Z2 top_coding B grid_jump range_gamma
DiaryName = strcat('FinalEmpirical','_kappa', num2str(kappa_n),'_B', num2str(B),'_topcoding', num2str(top_coding), '_2Z', num2str(Z2), 'gamma_grid', num2str(range_gamma),'.mat');

save(DiaryName)